### Preliminary ----------------------------------------------------------------
# Load required libraries
library(cowplot)      # For combining plots with plot_grid()
library(fixest)       # For fixed effects intrumental variables models
library(ggrepel)      # For non-overlapping text labels in plots
library(modelsummary) # For creating regression tables
library(scales)       # For re-scaling plot axes
library(tidyverse)    # For data manipulation and visualization

# Set formatting options for regression tables
options("modelsummary_format_numeric_latex" = "plain")

# Load datasets
chamber_df <- read_csv("State Legislative Chamber Panel.csv")
cycle_df <- read_csv("Redistricting Cycles.csv")

### Create Figure D1 -----------------------------------------------------------
# Visualize relationship between seat ratio and congressional candidacies
# across different redistricting cycles
ggplot(cycle_df, aes(x = log_ratio, y = prop_ran)) +
  geom_point(alpha = 0.3) +
  geom_text_repel(data = filter(cycle_df,
                                log_ratio < 1 | (log_ratio > 5.3) |
                                  (prop_ran > 0.08 &
                                     redist_cycle == "Chambers in the 2010s") |
                                  prop_ran > 0.13),
                  aes(label = state_chamber)) +
  geom_smooth(method = "lm", se = FALSE, lty = "solid", color = "black") +
  facet_wrap(~redist_cycle) +
  labs(x = "Logged Ratio of State Legislative \n Seats to U.S. House Seats",
       y = "Percent of State Legislators that \n Ran for Congress") +
  scale_x_continuous(limits = c(0, 6)) +
  scale_y_continuous(labels = percent_format()) +
  theme_bw() +
  theme(legend.position = "bottom",
        panel.grid = element_blank())

# Calculate F-statistics for relationship strength by redistricting period
cycle_df %>%
  filter(redist_cycle == "Chambers in the 2000s") %>%
  lm(prop_ran ~ log_ratio, data = .) %>%
  summary()

cycle_df %>%
  filter(redist_cycle != "Chambers in the 2000s") %>%
  lm(prop_ran ~ log_ratio, data = .) %>%
  summary()


### Create Table 1 -------------------------------------------------------------
# Main regression analysis examining effect of seat ratio on bipartisanship
# using both reduced form (OLS) and instrumental variable (2SLS) approaches

# Rescale prop_ran variable to percentage points for easier interpretation
chamber_df <- chamber_df %>%
  mutate(pct_ran = 100 * prop_ran)

# ROLL CALL POLARIZATION MODELS (median_diff as DV)
# Reduced form: Direct effect of log_ratio on polarization

# Model 1: Roll call polarization, reduced form, no controls
iv_1 <- feols(median_diff ~ log_ratio
              | election_year + state_chamber,
              vcov = "cluster",
              data = chamber_df)

# Model 2: Roll call polarization, reduced form, with controls
iv_2 <- feols(median_diff ~ log_ratio
              + majority_party
              + prop_majority
              + median_dpvs
              + maj_pvs
              + min_pvs
              + pct_female
              + cosponsor_limit
              + unif_government
              | election_year + state_chamber,
              vcov = "cluster",
              data = chamber_df)

# Second stage: Effect of congressional candidacies (instrumented by log_ratio)

# Model 3: Roll call polarization, 2SLS, no controls
iv_3 <- feols(median_diff ~ 1
              | election_year + state_chamber
              | pct_ran ~ log_ratio,
              vcov = "cluster",
              data = chamber_df)

# Model 4: Roll call polarization, 2SLS, with controls
iv_4 <- feols(median_diff ~ 1
              + majority_party
              + prop_majority
              + median_dpvs
              + maj_pvs
              + min_pvs
              + pct_female
              + cosponsor_limit
              + unif_government
              | election_year + state_chamber |
                pct_ran ~ log_ratio,
              vcov = "cluster",
              data = chamber_df)

# BIPARTISAN COSPONSORSHIP MODELS (avg_opp_prop as DV)
# Reduced form: Direct effect of log_ratio on cosponsorship

# Model 5: Bipartisan cosponsorship, reduced form, no controls
iv_5 <- feols(avg_opp_prop ~ log_ratio
              | election_year + state_chamber,
              vcov = "cluster",
              data = chamber_df)

# Model 6: Bipartisan cosponsorship, reduced form, with controls
iv_6 <- feols(avg_opp_prop ~ log_ratio
              + majority_party
              + prop_majority
              + median_dpvs
              + maj_pvs
              + min_pvs
              + pct_female
              + cosponsor_limit
              + unif_government
              | election_year + state_chamber,
              vcov = "cluster",
              data = chamber_df)

# Second stage: Effect of congressional candidacies (instrumented by log_ratio)

# Model 7: Bipartisan cosponsorship, 2SLS, no controls
iv_7 <- feols(avg_opp_prop ~ 1
              | election_year + state_chamber
              | pct_ran ~ log_ratio,
              vcov = "cluster",
              data = chamber_df)

# Model 8: Bipartisan cosponsorship, 2SLS, with controls
iv_8 <- feols(avg_opp_prop ~ 1
              + majority_party
              + prop_majority
              + median_dpvs
              + maj_pvs
              + min_pvs
              + pct_female
              + cosponsor_limit
              + unif_government
              | election_year + state_chamber
              | pct_ran ~ log_ratio,
              vcov = "cluster",
              data = chamber_df)

# Generate Table 1: Order models to match paper presentation
# (Roll call models 5-8, then cosponsorship models 1-4)
modelsummary(list(iv_1, iv_2, iv_3, iv_4, iv_5, iv_6, iv_7, iv_8),
             output = "latex_tabular",
             coef_map = c("log_ratio" = "Seat Ratio (Logged)",
                          "fit_pct_ran" = "Pct. Ran for Congress (Instrumented)"
             ),
             gof_map = c("nobs"),
             fmt = 3,
             add_rows = data.frame(
               name = c("Model", "Controls", "State-Chamber FEs", "Election Year FEs"),
               model_1 = c("Reduced Form (OLS)", "N", "Y", "Y"),
               model_2 = c("Reduced Form (OLS)", "Y", "Y", "Y"),
               model_3 = c("Second Stage (2SLS)", "N", "Y", "Y"),
               model_4 = c("Second Stage (2SLS)", "Y", "Y", "Y"),
               model_5 = c("Reduced Form (OLS)", "N", "Y", "Y"),
               model_6 = c("Reduced Form (OLS)", "Y", "Y", "Y"),
               model_7 = c("Second Stage (2SLS)", "N", "Y", "Y"),
               model_8 = c("Second Stage (2SLS)", "Y", "Y", "Y")
             ),
             stars = c("*" = 0.10,
                       "**" = 0.05,
                       "***" = 0.01))

### Create Figure 3 ------------------------------------------------------------
# Visualize predicted effects of seat ratio on both outcome measures
# with marginal density plots

# Re-estimate models with explicit fixed effects for prediction
# (fixest predict() requires factors rather than piped FEs)
iv_plot1 <- feols(median_diff ~ log_ratio +
                    as.factor(election_year) + as.factor(state_chamber),
                  data = chamber_df)

iv_plot2 <- feols(avg_opp_prop ~ log_ratio +
                    as.factor(election_year) + as.factor(state_chamber),
                  data = chamber_df)

# Define sequence of seat ratios for prediction (on log scale)
ratio_seq <- c(log(2), log(6), log(18), log(55), log(155))

# Generate predictions for both outcomes at fixed year/chamber values
model_predictions <- predict(iv_plot1,
                             data.frame(log_ratio = ratio_seq,
                                        election_year = 2020,
                                        state_chamber = "TX upper"),
                             interval = "confidence") %>%
  rename(median_diff = fit)

model_predictions <- predict(iv_plot2,
                             data.frame(log_ratio = ratio_seq,
                                        election_year = 2020,
                                        state_chamber = "TX upper"),
                             interval = "confidence") %>%
  rename(avg_opp_prop = fit) %>%
  select(1) %>%
  bind_cols(model_predictions) %>%
  mutate(log_ratio = ratio_seq)

# Create roll call polarization plot
rc_plot <- ggplot(chamber_df, aes(x = log_ratio, y = median_diff)) +
  geom_point(data = model_predictions) +
  geom_line(data = model_predictions) +
  # Label example states at different polarization levels
  geom_text(data = data.frame(
    state = c("AZ", "IA", "HI"),
    log_ratio = c(log(220), log(220), log(220)),
    median_diff = c(2.699, 1.690, 0.743)
  ), aes(label = state)) +
  # Add reference lines for select predictions
  geom_segment(aes(x = log(2), xend = log(155), y = 2.62),
               lty = "dashed") +
  geom_segment(aes(x = log(18), xend = log(155), y = 1.700),
               lty = "dashed") +
  theme_bw() +
  theme(plot.margin = unit(c(0, 0, 0, 0), "cm"),
        axis.text.x.bottom = element_blank(),
        axis.ticks.x.bottom = element_blank(),
        axis.text.y.left = element_blank(),
        axis.ticks.y.left = element_blank(),
        axis.title.x.top = element_blank(),
        axis.title.y.right = element_blank()) +
  scale_x_continuous(
    breaks = ratio_seq,
    limits = c(log(2), log(250)),
    labels = function(x) round(exp(x), 1),
    position = "top",
    sec.axis = dup_axis()
  ) +
  scale_y_continuous(
    position = "right",
    limits = c(0, 3),
    sec.axis = dup_axis()) +
  labs(x = "Ratio of State Legislators \n to Congressional Seats",
       y = "Difference in NP Score \n Between Party Medians")

# Create bipartisan cosponsorship plot
cs_plot <- ggplot(chamber_df, aes(x = log_ratio, y = avg_opp_prop)) +
  geom_point(data = model_predictions) +
  geom_line(data = model_predictions) +
  # Label example states at different cosponsorship levels
  geom_text(data = data.frame(
    state = c("IL", "IN", "SC"),
    log_ratio = c(log(220), log(220), log(220)),
    avg_opp_prop = c(0.159, 0.298, 0.431)
  ), aes(label = state)) +
  # Add reference lines for select predictions
  geom_segment(aes(x = log(2), xend = log(155), y = 0.156),
               lty = "dashed") +
  geom_segment(aes(x = log(18), xend = log(155), y = 0.293),
               lty = "dashed") +
  theme_bw() +
  theme(plot.margin = unit(c(0, 0, 0, 0), "cm"),
        axis.text.x.bottom = element_blank(),
        axis.ticks.x.bottom = element_blank(),
        axis.text.y.left = element_blank(),
        axis.ticks.y.left = element_blank(),
        axis.title.x.top = element_blank(),
        axis.title.y.right = element_blank()) +
  scale_x_continuous(
    breaks = ratio_seq,
    limits = c(log(2), log(250)),
    labels = function(x) round(exp(x), 1),
    position = "top",
    sec.axis = dup_axis()
  ) +
  scale_y_continuous(
    position = "right",
    limits = c(0.15, 0.45),
    breaks = seq(0.15, 0.45, 0.05),
    labels = percent_format(),
    sec.axis = dup_axis()) +
  labs(x = "Ratio of State Legislators \n to Congressional Seats",
       y = "Proportion of Bipartisan \n Cosponsors (Average)")

# Create marginal density plots for x-axis (shared across both outcomes)
xdens <- ggplot(chamber_df, aes(x = log_ratio)) +
  geom_density(fill = "gray80", alpha = 0.5) +
  theme_void() +
  theme(plot.margin = unit(c(0, 0, 0, 0), "cm"))

# Create marginal density plot for roll call y-axis
ydens_rc <- ggplot(chamber_df, aes(x = median_diff)) +
  geom_density(fill = "gray80", alpha = 0.5) +
  coord_flip() +
  theme_void() +
  theme(plot.margin = unit(c(0, 0, 0, 0), "cm"))

# Create marginal density plot for cosponsorship y-axis
ydens_cs <- ggplot(chamber_df, aes(x = avg_opp_prop)) +
  geom_density(fill = "gray80", alpha = 0.5) +
  coord_flip() +
  theme_void() +
  theme(plot.margin = unit(c(0, 0, 0, 0), "cm"))

# Empty plot for top-right corner of combined plots
empty <- ggplot() + theme_void()

# Combine plots: main plot with marginal densities
rc_combined <- plot_grid(xdens, empty, rc_plot, ydens_rc,
                         ncol = 2, nrow = 2,
                         rel_widths = c(4, 1),
                         rel_heights = c(1, 4),
                         align = "hv")

cs_combined <- plot_grid(xdens, empty, cs_plot, ydens_cs,
                         ncol = 2, nrow = 2,
                         rel_widths = c(4, 1),
                         rel_heights = c(1, 4),
                         align = "hv")

# Display final combined figure with both outcomes side-by-side
plot_grid(rc_combined, cs_combined, nrow = 1)

